*-------------------------------------------------------------------------------
*						Appendix IV Tab 2
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out"  

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Appendix IV Tab 2", replace 
set more off     


**------------------------------------------------------------------------------
* Step1: Generate Data
*					(1)CFPS2010-2018
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata1,replace emptyok

*Id occupation
use "$App_data\CFPS\paneldata_person.dta",clear

keep pid year occ
reshape wide occ,i(pid) j(year)

*demographic characteristics
merge 1:1 pid using "$App_data\CFPS\cfps2018crossyearid_202104.dta"
keep if _m==3
drop _m

*keep required variables
keep pid occ* birthy gender entrayear *edu *eduy employ*

rename cfps* a*
rename a*edu edu*
rename a*eduy eduy*
rename edu20* edu*
rename eduy20* eduy*

reshape long occ edu eduy employ,i(pid birthy gender entrayear) j(year)

append using tabledata1
save tabledata1,replace


*ONET V20 educaitonal requirement 
clear
set obs 0
save tabledata2,replace emptyok

import excel "$App_data\ONET N_version\onet20ac\Education Training and Experience.xlsx", sheet("Education__Training__and_Experi") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Required Level of Education"
   keep onetsoccode category datavalue
*Replace categorical data into "years of education"
   destring category,replace
   gen temp=category
   replace category=10 if temp==1
   replace category=12 if temp==2
   replace category=13 if temp==3
   replace category=14 if temp==4
   replace category=15 if temp==5
   replace category=16 if temp==6
   replace category=17 if temp==7
   replace category=18 if temp==8
   replace category=18 if temp==9
   replace category=18 if temp==10
   replace category=19 if temp==11
   replace category=20 if temp==12
   *The number for each category is the number of years for education.
   rename category educationyear
   drop temp
*"Datavalue" is the percent points of people in each category for that occupation
*An indicator of occupation required education is constructed as the weighted education years
   bysort onetsoccode: egen education=total(educationyear*datavalue/100)
   collapse (mean) education, by(onetsoccode)
   label variable education "Years of education LTHS 10, HS 12, BA 16, MS 18, PHD 19"
   sort onetsoccode

append using tabledata2
save tabledata2,replace


*Connect ONET V20 educaitonal requirement to Occupation category
clear
set obs 0
save tabledata3,replace emptyok

use "occ2010_consistent.dta",clear
bys consistent title_consistent:gen num =_n
levelsof num
foreach i in `r(levels)'{
preserve
keep if num==`i'
merge 1:m consistent title_consistent using CHN-consistent_ONETSOC2010.dta
keep if _m==3
drop _m

rename onet_code onetsoccode
merge m:1 onetsoccode using tabledata2
keep if _m==3
drop _m
append using tabledata3
save tabledata3,replace
restore
}

use tabledata3,clear
bys consistent title_consistent:egen education_index=mean(education)
keep consistent title_consistent education_index
duplicates drop

save tabledata3,replace

* Merge CFPS Occupaiton with census Occupation
* CFPS2010-2012 @GBT 2009
clear
set obs 0
save tabledata4,replace emptyok

import excel "$App_data\CFPS\CFPS2010职业编码对应表.xlsx", sheet("census2010职业编码对应表") firstrow allstring clear
drop if cfps12_occ==""

*Connect to Census
rename gb2009_occname title_2010
merge m:1 title_2010 using "occ2010_consistent.dta"
keep if _m==3
drop _m

*Get education characteristics
keep cfps12_occ cfps12_occname consistent title_consistent occ_1dig occ_2dig

merge m:1 consistent title_consistent using tabledata3
keep if _m==3
drop _m

*deal with missing values
bys occ_2dig:egen temp=mean(education)
replace education=temp if education==.
drop temp

*Finally get CFPS occ characteristics
collapse (mean) education, by(cfps12_occ cfps12_occname)
append using tabledata4
save tabledata4,replace


*Merge CFPS Panel with educational characteristics
clear
set obs 0
save table,replace emptyok

use tabledata1.dta,clear

*Data mistake or unemployed
keep if employ==1
drop if occ==. |occ==999999 |occ==-9 |occ==-8 |occ==-7 |occ==-2 |occ==-1
drop if occ== 60552 |occ==62706 |occ==63509 |occ==62706|occ==21107  

tostring occ, gen(cfps12_occ)
*Adjust cfps panel occ due to different category
replace cfps12_occ="10100" if cfps12_occ=="10000" |cfps12_occ=="10101"
replace cfps12_occ="10501" if cfps12_occ=="10500"|cfps12_occ=="10510"|cfps12_occ=="10520"|cfps12_occ=="10530"

merge m:1 cfps12_occ using tabledata4.dta
drop if _m==2
sort _m occ

*Deal with missing value
gen occ_2dig=substr(cfps12_occ,1,3)
bys occ_2dig:egen temp=mean(education)
replace education=temp if education==.
drop temp

gen occ_1dig=substr(cfps12_occ,1,1)
bys occ_1dig:egen temp=mean(education)
replace education=temp if education==.
drop temp


*Occupation Edu
gen required=1 if education<12
replace required=2 if education>=12&education<14
replace required=3 if education>=14

*Demographics edu
drop if education==.
drop if edu==-9 
gen group=1 if edu==1|edu==2|edu==3
replace group=2 if edu==4
replace group=3 if edu==5 |edu==6 |edu==7 | edu==8


gen num=1 
collapse (count) num,by(group required year)

append using table
save table,replace



**------------------------------------------------------------------------------
* Step1: Generate Data
*					(1)Women's Social Status 2010
**------------------------------------------------------------------------------
use "$App_data\Women Status\wave3.dta",clear

keep ID B3A C1A C2 C18AA
rename B3A edu
rename C1A employ
rename C2 occ
rename C18AA wage

keep if employ==1

gen occ2010=occ
tostring occ2010,replace
replace occ2010="0"+occ2010 if real(occ2010)<100
merge m:1 occ2010 using "occ2010_consistent.dta"
keep if _m==3
drop _m

keep ID edu wage consistent title_consistent occ_1dig occ_2dig

*merge with edu characteristics
merge m:1 consistent title_consistent using tabledata3
keep if _m==3
drop _m

*Occupation Edu
gen required=1 if education<12
replace required=2 if education>=12&education<14
replace required=3 if education>=14

*Demographics edu
gen group=1 if edu==1|edu==2|edu==3
replace group=2 if edu==4|edu==5
replace group=3 if edu==6 |edu==7 | edu==8


gen num=1 
collapse (count) num,by(group required)
gen year=2010

append using table
save table,replace



**------------------------------------------------------------------------------
* Step1: Generate Data
*					(1)Women's Social Status 2000
**------------------------------------------------------------------------------
*get 2-dig occupation education
clear
set obs 0
save tabledata5,replace emptyok

use "occ2010_consistent.dta",clear
bys consistent title_consistent:gen num =_n
levelsof num
foreach i in `r(levels)'{
preserve
keep if num==`i'
merge 1:m consistent title_consistent using CHN-consistent_ONETSOC2010.dta
keep if _m==3
drop _m

rename onet_code onetsoccode
merge m:1 onetsoccode using tabledata2
keep if _m==3
drop _m
append using tabledata5
save tabledata5,replace
restore
}

use tabledata5,clear
gen occ_20dig=substr(occ2010,1,2)
bys occ_20dig:egen education_index=mean(education)

keep occ_20dig education_index
duplicates drop

save tabledata5,replace


*Use data
use "$App_data\Women Status\wave2.dta",clear

keep c1_a c2 c18_a b4_a
gen id=_n
rename c1_a employ
rename c2 occ
rename c18_a wage
rename b4_a edu

drop if employ==4|employ==9
drop if employ==.
drop if occ==.
gen occ_temp=string(occ)
replace occ_temp="0"+occ_temp if strlen(occ_temp)==2
gen occ_20dig=substr(occ_temp,1,2)
merge m:1 occ_20dig using tabledata5
keep if _m==3
drop _m

*Occupation Edu
gen required=1 if education<12
replace required=2 if education>=12&education<14
replace required=3 if education>=14

*Demographics edu
gen group=1 if edu==1|edu==2|edu==3 |edu==.
replace group=2 if edu==4|edu==5
replace group=3 if edu==6 |edu==7 | edu==8


gen num=1 
collapse (count) num,by(group required)

gen year=2000

append using table
save table,replace


**------------------------------------------------------------------------------
* Step2: Display Data			
**------------------------------------------------------------------------------
use table,clear
bys year required:egen pop=total(num)
gen share=num/pop*100

drop pop num
reshape wide share,i(required year) j(group)
replace year=2000+year if year<2000
sort required year 
format share* %9.2f

list _all

erase table.dta
erase tabledata1.dta
erase tabledata2.dta
erase tabledata3.dta
erase tabledata4.dta
erase tabledata5.dta


log close 





